### setwd("/Users/seantrott/Dropbox/UCSD/Research/NLMs/llm_clt/src/analysis/")
### Read in all data
df_all_results = read_csv("../../data/processed/gc_results.csv")
## Rows: 33214 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): k, combo_index, list_num, n, pearson_centaur1, spearman_centaur1, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_all_results)
## [1] 33214
### How many per list?
table(df_all_results$list_num)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 126 1022 2046 510 2046 3086 2046 3086 3086 510 2046 3086 3086 3086 2046 254
## 17
## 2046
### LLM data
df_llm = read_csv("../../data/processed/gc_llm_corrs.csv")
## Rows: 17 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): list_num, n, pearson_llm, spearman_llm
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_llm %>%
summarise(m_spearmam = mean(spearman_llm),
sd_spearman = sd(spearman_llm))
## # A tibble: 1 × 2
## m_spearmam sd_spearman
## <dbl> <dbl>
## 1 0.803 0.0413
df_individuals = df_all_results %>%
filter(k == 1)
df_individuals %>%
ggplot(aes(x = spearman_ppt)) +
geom_histogram(alpha = .5) +
labs(x="Correlation with Original Concreteness Norms", y="Count") +
theme_minimal() +
geom_vline(xintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue", alpha = .8) +
theme(text = element_text(size = 15))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### Pivot
df_results_long = df_all_results %>%
select('spearman_centaur1', 'spearman_centaur2', 'spearman_ppt',
'k', 'combo_index', 'list_num') %>%
pivot_longer(cols = c('spearman_centaur1', 'spearman_centaur2', 'spearman_ppt'),
names_to = "sample_type",
values_to = "correlation") %>%
mutate(sample_type = sub("spearman_", "", sample_type)) %>%
mutate(sample_type = ifelse(
sample_type == "ppt", "Human", sample_type
))
### Visualize
df_results_summ = df_results_long %>%
group_by(k, sample_type) %>%
summarize(m_corr = mean(correlation),
sd_corr = sd(correlation),
se_corr = sd(correlation)/sqrt(n())) %>%
mutate(sample_type = toTitleCase(sample_type))
## `summarise()` has grouped output by 'k'. You can override using the `.groups`
## argument.
df_results_summ %>%
ggplot(aes(x = k, y = m_corr)) +
geom_point(aes(color=factor(sample_type), shape = factor(sample_type)), size=3, alpha = .5) + # Add points
geom_line(aes(color=factor(sample_type))) + # Connect points with lines
geom_errorbar(aes(ymin=m_corr-se_corr * 2, ymax=m_corr+se_corr * 2, width=0.2, color = factor(sample_type))) +
labs(x="Number of Participants", y="Correlation with Concreteness Norms", color = "Sample Type", shape = "Sample Type") +
theme_minimal() +
geom_hline(yintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue",
alpha = .8) +
theme(text = element_text(size = 15),
legend.position="bottom") +
scale_color_manual(values = my_colors)
Here, we visualize projected differences in quality and cost of a human sample vs. GPT-4.
For human cost, we assume:
For GPT-4 cost, we assume:
### Human assumptions
RATE = 12
SECONDS_PER_JUDGMENT = 5
HUMAN_CPJ = RATE / (3600/SECONDS_PER_JUDGMENT)
### GPT-4 assumptions
COST_PER_1K_SAMPLED = 0.0003
COST_PER_1K_GENERATED = 0.0006
NUM_SAMPLED_PER_JUDGMENT = 20
NUM_GENERATED_PER_JUDGMENT = 10
GPT_CPJ = (NUM_SAMPLED_PER_JUDGMENT / 1000) * COST_PER_1K_SAMPLED + 1000 * (NUM_GENERATED_PER_JUDGMENT / 1000) * COST_PER_1K_GENERATED
### Visualize
df_costs_summ = df_all_results %>%
mutate(ratio_human_quality = spearman_ppt / spearman_llm) %>%
mutate(ratio_human_cost = (k * HUMAN_CPJ) / GPT_CPJ) %>%
mutate(ratio_centaur1_quality = spearman_centaur1 / spearman_llm) %>%
mutate(ratio_centaur1_cost = (GPT_CPJ + k * HUMAN_CPJ) / GPT_CPJ) %>%
mutate(ratio_centaur2_quality = spearman_centaur2 / spearman_llm) %>%
mutate(ratio_centaur2_cost = (GPT_CPJ + k * HUMAN_CPJ) / GPT_CPJ) %>%
group_by(k) %>%
summarise(m_human_ratio_quality = mean(ratio_human_quality),
se_human_ratio_quality = sd(ratio_human_quality)/sqrt(n()),
m_human_ratio_cost = mean(ratio_human_cost),
se_human_ratio_cost = sd(ratio_human_cost),
### Centaur1
m_centaur1_ratio_quality = mean(ratio_centaur1_quality),
se_centaur1_ratio_quality = sd(ratio_centaur1_quality)/sqrt(n()),
m_centaur1_ratio_cost = mean(ratio_centaur1_cost),
se_centaur1_ratio_cost = sd(ratio_centaur1_cost),
### Centaur2
m_centaur2_ratio_quality = mean(ratio_centaur2_quality),
se_centaur2_ratio_quality = sd(ratio_centaur2_quality)/sqrt(n()),
m_centaur2_ratio_cost = mean(ratio_centaur2_cost),
se_centaur2_ratio_cost = sd(ratio_centaur2_cost))
df_costs_summ_long <- df_costs_summ %>%
pivot_longer(
cols = -k,
names_to = "variable",
values_to = "value"
) %>%
mutate(
sample_type = str_extract(variable, "human|centaur1|centaur2"),
metric = case_when(
str_detect(variable, "m_.*_quality") ~ "m_quality",
str_detect(variable, "se_.*_quality") ~ "se_quality",
str_detect(variable, "m_.*_cost") ~ "m_cost",
str_detect(variable, "se_.*_cost") ~ "se_cost"
)
) %>%
select(-variable) %>%
pivot_wider(
names_from = metric,
values_from = value
) %>%
mutate(sample_type = toTitleCase(sample_type))
df_costs_summ_long %>%
ggplot(aes(x = m_quality, y = m_cost, color = sample_type, shape = sample_type)) +
geom_point(size=3, alpha = .5) + # Add points
geom_line(alpha = .6) + # Connect points with lines
labs(x="Quality Ratio", y="Cost Ratio",
color = "Sample Type", shape = "Sample Type") +
theme_minimal() +
geom_vline(xintercept = 1, linetype = "dotted") +
theme(text = element_text(size = 15),
legend.position="bottom") +
scale_color_manual(values = my_colors)
### Read in all data
df_all_results = read_csv("../../data/processed/rawc_results.csv")
## Rows: 21340 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): k, combo_index, list_num, n, pearson_centaur1, spearman_centaur1, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_all_results)
## [1] 21340
### How many per list?
table(df_all_results$list_num)
##
## 1 2 3 4 5 6 7 8
## 3086 3754 510 4438 1022 3754 3754 1022
### LLM data
df_llm = read_csv("../../data/processed/rawc_llm_corrs.csv")
## Rows: 8 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): list_num, n, pearson_llm, spearman_llm
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_llm %>%
summarise(m_spearmam = mean(spearman_llm),
sd_spearman = sd(spearman_llm))
## # A tibble: 1 × 2
## m_spearmam sd_spearman
## <dbl> <dbl>
## 1 0.804 0.0205
df_individuals = df_all_results %>%
filter(k == 1)
mean(df_individuals$spearman_ppt)
## [1] 0.7762202
sd(df_individuals$spearman_ppt)
## [1] 0.103045
df_individuals %>%
ggplot(aes(x = spearman_ppt)) +
geom_histogram(alpha = .5) +
labs(x="Correlation with Original RAW-C Norms", y="Count") +
theme_minimal() +
geom_vline(xintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue",
size = 1.2, alpha = .5) +
theme(text = element_text(size = 15))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### Pivot
df_results_long = df_all_results %>%
select('spearman_centaur1', 'spearman_centaur2', 'spearman_ppt',
'k', 'combo_index', 'list_num') %>%
pivot_longer(cols = c('spearman_centaur1', 'spearman_centaur2', 'spearman_ppt'),
names_to = "sample_type",
values_to = "correlation") %>%
mutate(sample_type = sub("spearman_", "", sample_type)) %>%
mutate(sample_type = ifelse(
sample_type == "ppt", "Human", sample_type
))
### Visualize
df_results_summ = df_results_long %>%
group_by(k, sample_type) %>%
summarize(m_corr = mean(correlation),
sd_corr = sd(correlation),
se_corr = sd(correlation)/sqrt(n())) %>%
mutate(sample_type = toTitleCase(sample_type))
## `summarise()` has grouped output by 'k'. You can override using the `.groups`
## argument.
df_results_summ %>%
ggplot(aes(x = k, y = m_corr)) +
geom_point(aes(color=factor(sample_type),shape = factor(sample_type)), size =3, alpha = .6) + # Add points
geom_line(aes(color=factor(sample_type)), alpha = .5) + # Connect points with lines
geom_errorbar(aes(ymin=m_corr-se_corr * 2, ymax=m_corr+se_corr * 2, width=0.2, color = factor(sample_type)), alpha = .5) +
labs(x="Number of Participants", y="Spearman's Rho", color = "Sample Type",
shape = "Sample Type") +
theme_minimal() +
geom_hline(yintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue",
size = 1.2, alpha = .5) +
theme(text = element_text(size = 15),
legend.position="bottom")+
scale_color_manual(values = my_colors)
For human cost, we assume:
For GPT-4 cost, we assume:
### Human assumptions
RATE = 12
SECONDS_PER_JUDGMENT = 5
HUMAN_CPJ = RATE / (3600/SECONDS_PER_JUDGMENT)
### GPT-4 assumptions
COST_PER_1K_SAMPLED = 0.0003
COST_PER_1K_GENERATED = 0.0006
NUM_SAMPLED_PER_JUDGMENT = 20
NUM_GENERATED_PER_JUDGMENT = 10
GPT_CPJ = (NUM_SAMPLED_PER_JUDGMENT / 1000) * COST_PER_1K_SAMPLED + 1000 * (NUM_GENERATED_PER_JUDGMENT / 1000) * COST_PER_1K_GENERATED
### Visualize
df_costs_summ = df_all_results %>%
mutate(ratio_human_quality = spearman_ppt / spearman_llm) %>%
mutate(ratio_human_cost = (k * HUMAN_CPJ) / GPT_CPJ) %>%
mutate(ratio_centaur1_quality = spearman_centaur1 / spearman_llm) %>%
mutate(ratio_centaur1_cost = (GPT_CPJ + k * HUMAN_CPJ) / GPT_CPJ) %>%
mutate(ratio_centaur2_quality = spearman_centaur2 / spearman_llm) %>%
mutate(ratio_centaur2_cost = (GPT_CPJ + k * HUMAN_CPJ) / GPT_CPJ) %>%
group_by(k) %>%
summarise(m_human_ratio_quality = mean(ratio_human_quality),
se_human_ratio_quality = sd(ratio_human_quality)/sqrt(n()),
m_human_ratio_cost = mean(ratio_human_cost),
se_human_ratio_cost = sd(ratio_human_cost),
### Centaur1
m_centaur1_ratio_quality = mean(ratio_centaur1_quality),
se_centaur1_ratio_quality = sd(ratio_centaur1_quality)/sqrt(n()),
m_centaur1_ratio_cost = mean(ratio_centaur1_cost),
se_centaur1_ratio_cost = sd(ratio_centaur1_cost),
### Centaur2
m_centaur2_ratio_quality = mean(ratio_centaur2_quality),
se_centaur2_ratio_quality = sd(ratio_centaur2_quality)/sqrt(n()),
m_centaur2_ratio_cost = mean(ratio_centaur2_cost),
se_centaur2_ratio_cost = sd(ratio_centaur2_cost))
df_costs_summ_long <- df_costs_summ %>%
pivot_longer(
cols = -k,
names_to = "variable",
values_to = "value"
) %>%
mutate(
sample_type = str_extract(variable, "human|centaur1|centaur2"),
metric = case_when(
str_detect(variable, "m_.*_quality") ~ "m_quality",
str_detect(variable, "se_.*_quality") ~ "se_quality",
str_detect(variable, "m_.*_cost") ~ "m_cost",
str_detect(variable, "se_.*_cost") ~ "se_cost"
)
) %>%
select(-variable) %>%
pivot_wider(
names_from = metric,
values_from = value
) %>%
mutate(sample_type = toTitleCase(sample_type))
df_costs_summ_long %>%
ggplot(aes(x = m_quality, y = m_cost, color = sample_type, shape = sample_type)) +
geom_point(size=3, alpha = .5) + # Add points
geom_line(alpha = .6) + # Connect points with lines
# geom_smooth(alpha = .2) +
labs(x="Quality Ratio", y="Cost Ratio",
color = "Sample Type", shape = "Sample Type") +
theme_minimal() +
geom_vline(xintercept = 1, linetype = "dotted") +
theme(text = element_text(size = 15),
legend.position="bottom") +
scale_color_manual(values = my_colors)
### setwd("/Users/seantrott/Dropbox/UCSD/Research/NLMs/llm_clt/src/analysis/")
### Read in all data
df_all_results = read_csv("../../data/processed/gc_valence_results.csv")
## Rows: 14294 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): k, combo_index, list_num, n, pearson_centaur1, spearman_centaur1, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_all_results)
## [1] 14294
### How many per list?
table(df_all_results$list_num)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 62 126 2046 62 510 5150 3086 254 254 510 510 1022 126 254 62 6
## 17
## 254
### LLM data
df_llm = read_csv("../../data/processed/gc_valence_llm_corrs.csv")
## Rows: 17 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): list_num, n, pearson_llm, spearman_llm
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_llm %>%
summarise(m_spearmam = mean(spearman_llm),
sd_spearman = sd(spearman_llm))
## # A tibble: 1 × 2
## m_spearmam sd_spearman
## <dbl> <dbl>
## 1 0.750 0.0678
df_individuals = df_all_results %>%
filter(k == 1)
df_individuals %>%
ggplot(aes(x = spearman_ppt)) +
geom_histogram(alpha = .5) +
labs(x="Correlation with Original Valence Norms", y="Count") +
theme_minimal() +
geom_vline(xintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue", alpha = .8) +
theme(text = element_text(size = 15))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### Pivot
df_results_long = df_all_results %>%
select('spearman_centaur1', 'spearman_centaur2', 'spearman_ppt',
'k', 'combo_index', 'list_num') %>%
pivot_longer(cols = c('spearman_centaur1', 'spearman_centaur2', 'spearman_ppt'),
names_to = "sample_type",
values_to = "correlation") %>%
mutate(sample_type = sub("spearman_", "", sample_type)) %>%
mutate(sample_type = ifelse(
sample_type == "ppt", "Human", sample_type
))
### Visualize
df_results_summ = df_results_long %>%
group_by(k, sample_type) %>%
summarize(m_corr = mean(correlation),
sd_corr = sd(correlation),
se_corr = sd(correlation)/sqrt(n())) %>%
mutate(sample_type = toTitleCase(sample_type))
## `summarise()` has grouped output by 'k'. You can override using the `.groups`
## argument.
df_results_summ %>%
ggplot(aes(x = k, y = m_corr)) +
geom_point(aes(color=factor(sample_type), shape = factor(sample_type)), size=3, alpha = .5) + # Add points
geom_line(aes(color=factor(sample_type))) + # Connect points with lines
geom_errorbar(aes(ymin=m_corr-se_corr * 2, ymax=m_corr+se_corr * 2, width=0.2, color = factor(sample_type))) +
labs(x="Number of Participants", y="Correlation with Valence Norms", color = "Sample Type", shape = "Sample Type") +
theme_minimal() +
geom_hline(yintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue",
alpha = .8) +
theme(text = element_text(size = 15),
legend.position="bottom") +
scale_color_manual(values = my_colors)
Here, we visualize projected differences in quality and cost of a human sample vs. GPT-4.
For human cost, we assume:
For GPT-4 cost, we assume:
### Human assumptions
RATE = 12
SECONDS_PER_JUDGMENT = 5
HUMAN_CPJ = RATE / (3600/SECONDS_PER_JUDGMENT)
### GPT-4 assumptions
COST_PER_1K_SAMPLED = 0.0003
COST_PER_1K_GENERATED = 0.0006
NUM_SAMPLED_PER_JUDGMENT = 20
NUM_GENERATED_PER_JUDGMENT = 10
GPT_CPJ = (NUM_SAMPLED_PER_JUDGMENT / 1000) * COST_PER_1K_SAMPLED + 1000 * (NUM_GENERATED_PER_JUDGMENT / 1000) * COST_PER_1K_GENERATED
### Visualize
df_costs_summ = df_all_results %>%
mutate(ratio_human_quality = spearman_ppt / spearman_llm) %>%
mutate(ratio_human_cost = (k * HUMAN_CPJ) / GPT_CPJ) %>%
mutate(ratio_centaur1_quality = spearman_centaur1 / spearman_llm) %>%
mutate(ratio_centaur1_cost = (GPT_CPJ + k * HUMAN_CPJ) / GPT_CPJ) %>%
mutate(ratio_centaur2_quality = spearman_centaur2 / spearman_llm) %>%
mutate(ratio_centaur2_cost = (GPT_CPJ + k * HUMAN_CPJ) / GPT_CPJ) %>%
group_by(k) %>%
summarise(m_human_ratio_quality = mean(ratio_human_quality),
se_human_ratio_quality = sd(ratio_human_quality)/sqrt(n()),
m_human_ratio_cost = mean(ratio_human_cost),
se_human_ratio_cost = sd(ratio_human_cost),
### Centaur1
m_centaur1_ratio_quality = mean(ratio_centaur1_quality),
se_centaur1_ratio_quality = sd(ratio_centaur1_quality)/sqrt(n()),
m_centaur1_ratio_cost = mean(ratio_centaur1_cost),
se_centaur1_ratio_cost = sd(ratio_centaur1_cost),
### Centaur2
m_centaur2_ratio_quality = mean(ratio_centaur2_quality),
se_centaur2_ratio_quality = sd(ratio_centaur2_quality)/sqrt(n()),
m_centaur2_ratio_cost = mean(ratio_centaur2_cost),
se_centaur2_ratio_cost = sd(ratio_centaur2_cost))
df_costs_summ_long <- df_costs_summ %>%
pivot_longer(
cols = -k,
names_to = "variable",
values_to = "value"
) %>%
mutate(
sample_type = str_extract(variable, "human|centaur1|centaur2"),
metric = case_when(
str_detect(variable, "m_.*_quality") ~ "m_quality",
str_detect(variable, "se_.*_quality") ~ "se_quality",
str_detect(variable, "m_.*_cost") ~ "m_cost",
str_detect(variable, "se_.*_cost") ~ "se_cost"
)
) %>%
select(-variable) %>%
pivot_wider(
names_from = metric,
values_from = value
) %>%
mutate(sample_type = toTitleCase(sample_type))
df_costs_summ_long %>%
ggplot(aes(x = m_quality, y = m_cost, color = sample_type, shape = sample_type)) +
geom_point(size=3, alpha = .5) + # Add points
geom_line(alpha = .6) + # Connect points with lines
labs(x="Quality Ratio", y="Cost Ratio",
color = "Sample Type", shape = "Sample Type") +
theme_minimal() +
geom_vline(xintercept = 1, linetype = "dotted") +
theme(text = element_text(size = 15),
legend.position="bottom") +
scale_color_manual(values = my_colors)
Here, we perform a supplementary analysis to investigate list-wise variation in the correlation.
Checking individual lists for supplementary analysis:
df_results_summ = df_results_long %>%
filter(sample_type == "Human") %>%
group_by(k, sample_type, list_num) %>%
summarize(m_corr = mean(correlation),
sd_corr = sd(correlation),
se_corr = sd(correlation)/sqrt(n())) %>%
mutate(sample_type = toTitleCase(sample_type))
## `summarise()` has grouped output by 'k', 'sample_type'. You can override using
## the `.groups` argument.
df_results_summ %>%
ggplot(aes(x = k, y = m_corr)) +
geom_point(aes(color=factor(list_num)), alpha = .5, size = 2) + # Add points
geom_line(aes(color=factor(list_num))) + # Connect points with lines
labs(x="Number of Participants", y="Correlation with Valence Norms", color = "List", shape = "List") +
theme_minimal() +
geom_hline(yintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue",
alpha = .8) +
theme(text = element_text(size = 15),
legend.position="bottom")
We also ask whether list-wise variation in the valence norms correlates with list-wise variation in the LLM correlations.
df_results_summ_list = df_results_summ %>%
group_by(list_num) %>%
summarise(max_corr_human = max(m_corr),
min_corr_human = min(m_corr),
mean_corr_human = mean(m_corr)) %>%
inner_join(df_llm)
## Joining, by = "list_num"
df_results_summ_list %>%
ggplot(aes(x = spearman_llm,
y = mean_corr_human)) +
geom_point(size = 4, alpha = .4) +
geom_smooth(method = "lm") +
theme_minimal() +
labs(x = "List-wise LLM Correlation",
y = "List-wise Human Sample Correlation")
## `geom_smooth()` using formula 'y ~ x'
cor.test(df_results_summ_list$spearman_llm, df_results_summ_list$mean_corr_human)
##
## Pearson's product-moment correlation
##
## data: df_results_summ_list$spearman_llm and df_results_summ_list$mean_corr_human
## t = 2.5354, df = 15, p-value = 0.02285
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.09102517 0.81405100
## sample estimates:
## cor
## 0.5477073
cor.test(df_results_summ_list$spearman_llm, df_results_summ_list$mean_corr_human)
##
## Pearson's product-moment correlation
##
## data: df_results_summ_list$spearman_llm and df_results_summ_list$mean_corr_human
## t = 2.5354, df = 15, p-value = 0.02285
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.09102517 0.81405100
## sample estimates:
## cor
## 0.5477073
Finally, we ask whether list-wise variation in either measure can be predicted by list-wise variation in the words themselves.
# Set your working directory
folder_path <- "../../experiment/stimuli/glasgow_lists/"
# Create a list of CSV file paths
csv_files <- list.files(path = folder_path, pattern = "\\.csv$", full.names = TRUE)
# Read and combine the files
combined_df <- map_df(csv_files, read_csv)
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 55 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, display
## dbl (1): list_number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
table(combined_df$list_number)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 55
# Now merge with actual Glasgow norms
df_values = read_csv("../../data/official/human/glasgow.csv") %>%
select(word, Valence.M, Valence.SD, Valence.N, Length) %>%
inner_join(combined_df) %>%
mutate(list_num = list_number) %>%
group_by(list_num) %>%
summarise(mean_valence = mean(Valence.M),
mean_valence_sd = mean(Valence.SD),
sd_valence = sd(Valence.M),
mean_length = mean(Length))
## Rows: 871 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): word
## dbl (28): Length, Arousal.M, Arousal.SD, Arousal.N, Valence.M, Valence.SD, V...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining, by = "word"
### Double-check observations per list
nrow(df_values)
## [1] 17
table(df_values$list_num)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
### Now merge with list-wise variation
df_merged = df_values %>%
left_join(df_results_summ_list)
## Joining, by = "list_num"
nrow(df_merged)
## [1] 17
### Does either the average or SD in valence predict variation?
mod = lm(data = df_merged, mean_corr_human ~ mean_valence + sd_valence + mean_valence_sd)
summary(mod)
##
## Call:
## lm(formula = mean_corr_human ~ mean_valence + sd_valence + mean_valence_sd,
## data = df_merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18211 -0.02600 0.01709 0.07061 0.10135
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.28377 1.15217 -0.246 0.809
## mean_valence 0.08419 0.15830 0.532 0.604
## sd_valence 0.25301 0.27836 0.909 0.380
## mean_valence_sd 0.19285 0.43516 0.443 0.665
##
## Residual standard error: 0.09064 on 13 degrees of freedom
## Multiple R-squared: 0.07604, Adjusted R-squared: -0.1372
## F-statistic: 0.3566 on 3 and 13 DF, p-value: 0.7852
### No, but the average LLM correlation per list does.
mod = lm(data = df_merged, mean_corr_human ~ spearman_llm)
summary(mod)
##
## Call:
## lm(formula = mean_corr_human ~ spearman_llm, data = df_merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.136535 -0.042413 0.009058 0.026781 0.102938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1880 0.2037 0.923 0.3708
## spearman_llm 0.6865 0.2708 2.535 0.0229 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07345 on 15 degrees of freedom
## Multiple R-squared: 0.3, Adjusted R-squared: 0.2533
## F-statistic: 6.428 on 1 and 15 DF, p-value: 0.02285
### setwd("/Users/seantrott/Dropbox/UCSD/Research/NLMs/llm_clt/src/analysis/")
### Read in all data
df_all_results = read_csv("../../data/processed/iconicity_results.csv")
## Rows: 61240 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): k, combo_index, list_num, n, pearson_centaur1, spearman_centaur1, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(df_all_results)
## [1] 61240
### How many per list?
table(df_all_results$list_num)
##
## 1 2 3 4 5 6 7 8 9 10
## 5753 6810 7340 5768 6278 5686 3735 6242 6289 7339
### LLM data
df_llm = read_csv("../../data/processed/iconicity_llm_corrs.csv")
## Rows: 10 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): list_num, n, pearson_llm, spearman_llm
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_llm %>%
summarise(m_spearman = mean(spearman_llm),
sd_spearman = sd(spearman_llm))
## # A tibble: 1 × 2
## m_spearman sd_spearman
## <dbl> <dbl>
## 1 0.595 0.103
df_individuals = df_all_results %>%
filter(k == 1)
mean(df_individuals$spearman_ppt)
## [1] 0.3673823
sd(df_individuals$spearman_ppt)
## [1] 0.1319105
df_individuals %>%
ggplot(aes(x = spearman_ppt)) +
geom_histogram(alpha = .5) +
labs(x="Correlation with Original Iconicity Norms", y="Count") +
theme_minimal() +
geom_vline(xintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue", alpha = .8) +
theme(text = element_text(size = 15))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### Pivot
df_results_long = df_all_results %>%
select('spearman_centaur1', 'spearman_centaur2', 'spearman_ppt',
'k', 'combo_index', 'list_num') %>%
pivot_longer(cols = c('spearman_centaur1', 'spearman_centaur2', 'spearman_ppt'),
names_to = "sample_type",
values_to = "correlation") %>%
mutate(sample_type = sub("spearman_", "", sample_type)) %>%
mutate(sample_type = ifelse(
sample_type == "ppt", "Human", sample_type
))
### Visualize
df_results_summ = df_results_long %>%
group_by(k, sample_type) %>%
summarize(m_corr = mean(correlation),
sd_corr = sd(correlation),
se_corr = sd(correlation)/sqrt(n())) %>%
mutate(sample_type = toTitleCase(sample_type))
## `summarise()` has grouped output by 'k'. You can override using the `.groups`
## argument.
df_results_summ %>%
ggplot(aes(x = k, y = m_corr)) +
geom_point(aes(color=factor(sample_type), shape = factor(sample_type)), size=3, alpha = .5) + # Add points
geom_line(aes(color=factor(sample_type))) + # Connect points with lines
geom_errorbar(aes(ymin=m_corr-se_corr * 2, ymax=m_corr+se_corr * 2, width=0.2, color = factor(sample_type))) +
labs(x="Number of Participants", y="Correlation with Iconicity Norms", color = "Sample Type", shape = "Sample Type") +
theme_minimal() +
geom_hline(yintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue",
alpha = .8) +
theme(text = element_text(size = 15),
legend.position="bottom") +
scale_color_manual(values = my_colors)
Here, we visualize projected differences in quality and cost of a human sample vs. GPT-4.
For human cost, we assume:
For GPT-4 cost, we assume:
### Human assumptions
RATE = 12
SECONDS_PER_JUDGMENT = 5
HUMAN_CPJ = RATE / (3600/SECONDS_PER_JUDGMENT)
### GPT-4 assumptions
COST_PER_1K_SAMPLED = 0.0003
COST_PER_1K_GENERATED = 0.0006
NUM_SAMPLED_PER_JUDGMENT = 20
NUM_GENERATED_PER_JUDGMENT = 10
GPT_CPJ = (NUM_SAMPLED_PER_JUDGMENT / 1000) * COST_PER_1K_SAMPLED + 1000 * (NUM_GENERATED_PER_JUDGMENT / 1000) * COST_PER_1K_GENERATED
### Visualize
df_costs_summ = df_all_results %>%
mutate(ratio_human_quality = spearman_ppt / spearman_llm) %>%
mutate(ratio_human_cost = (k * HUMAN_CPJ) / GPT_CPJ) %>%
mutate(ratio_centaur1_quality = spearman_centaur1 / spearman_llm) %>%
mutate(ratio_centaur1_cost = (GPT_CPJ + k * HUMAN_CPJ) / GPT_CPJ) %>%
mutate(ratio_centaur2_quality = spearman_centaur2 / spearman_llm) %>%
mutate(ratio_centaur2_cost = (GPT_CPJ + k * HUMAN_CPJ) / GPT_CPJ) %>%
group_by(k) %>%
summarise(m_human_ratio_quality = mean(ratio_human_quality),
se_human_ratio_quality = sd(ratio_human_quality)/sqrt(n()),
m_human_ratio_cost = mean(ratio_human_cost),
se_human_ratio_cost = sd(ratio_human_cost),
### Centaur1
m_centaur1_ratio_quality = mean(ratio_centaur1_quality),
se_centaur1_ratio_quality = sd(ratio_centaur1_quality)/sqrt(n()),
m_centaur1_ratio_cost = mean(ratio_centaur1_cost),
se_centaur1_ratio_cost = sd(ratio_centaur1_cost),
### Centaur2
m_centaur2_ratio_quality = mean(ratio_centaur2_quality),
se_centaur2_ratio_quality = sd(ratio_centaur2_quality)/sqrt(n()),
m_centaur2_ratio_cost = mean(ratio_centaur2_cost),
se_centaur2_ratio_cost = sd(ratio_centaur2_cost))
df_costs_summ_long <- df_costs_summ %>%
pivot_longer(
cols = -k,
names_to = "variable",
values_to = "value"
) %>%
mutate(
sample_type = str_extract(variable, "human|centaur1|centaur2"),
metric = case_when(
str_detect(variable, "m_.*_quality") ~ "m_quality",
str_detect(variable, "se_.*_quality") ~ "se_quality",
str_detect(variable, "m_.*_cost") ~ "m_cost",
str_detect(variable, "se_.*_cost") ~ "se_cost"
)
) %>%
select(-variable) %>%
pivot_wider(
names_from = metric,
values_from = value
) %>%
mutate(sample_type = toTitleCase(sample_type))
df_costs_summ_long %>%
ggplot(aes(x = m_quality, y = m_cost, color = sample_type, shape = sample_type)) +
geom_point(size=3, alpha = .5) + # Add points
geom_line(alpha = .6) + # Connect points with lines
labs(x="Quality Ratio", y="Cost Ratio",
color = "Sample Type", shape = "Sample Type") +
theme_minimal() +
geom_vline(xintercept = 1, linetype = "dotted") +
theme(text = element_text(size = 15),
legend.position="bottom") +
scale_color_manual(values = my_colors)
Checking individual lists for supplementary analysis:
df_results_summ = df_results_long %>%
filter(sample_type == "Human") %>%
group_by(k, sample_type, list_num) %>%
summarize(m_corr = mean(correlation),
sd_corr = sd(correlation),
se_corr = sd(correlation)/sqrt(n())) %>%
mutate(sample_type = toTitleCase(sample_type))
## `summarise()` has grouped output by 'k', 'sample_type'. You can override using
## the `.groups` argument.
df_results_summ %>%
ggplot(aes(x = k, y = m_corr)) +
geom_point(aes(color=factor(list_num)), alpha = .5, size = 2) + # Add points
geom_line(aes(color=factor(list_num))) + # Connect points with lines
labs(x="Number of Participants", y="Correlation with Iconicity Norms", color = "List", shape = "List") +
theme_minimal() +
geom_hline(yintercept = mean(df_llm$spearman_llm), ### LLM
linetype = "dotted", color = "blue",
alpha = .8) +
theme(text = element_text(size = 15),
legend.position="bottom")
We also recalculate NNB within each list to ensure that it doesn’t depend hugely on this list-wise variation.
df_results_summ = df_results_summ %>%
left_join(df_llm) %>%
mutate(llm_diff = m_corr - spearman_llm)
## Joining, by = "list_num"
df_results_summ %>%
ggplot(aes(x = k, y = llm_diff)) +
geom_point(aes(color=factor(list_num)), size=2, alpha = .5) + # Add points
geom_line(aes(color=factor(list_num))) + # Connect points with lines
labs(x="Number of Participants", y="Difference (Human - GPT-4)", color = "Sample Type", shape = "Sample Type") +
theme_minimal() +
geom_hline(yintercept = 0, ### LLM
linetype = "dotted", color = "blue",
alpha = .8) +
theme(text = element_text(size = 15),
legend.position="bottom")